{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```bash\n",
    "export PATH=${SPARK_HOME}/bin:${PATH}\n",
    "export SPARK_LOCAL_IP=localhost\n",
    "export PYSPARK_DRIVER_PYTHON=jupyter\n",
    "export PYSPARK_DRIVER_PYTHON_OPTS='lab --ip=0.0.0.0 --allow-root --no-browser --NotebookApp.token=\"\"'\n",
    "export PACKAGES=\"com.esri:filegdb:0.12.5\"\n",
    "export PACKAGES=\"${PACKAGES},com.esri:spark-functions:0.10\"\n",
    "pyspark\\\n",
    "  --master local[*]\\\n",
    "  --num-executors 1\\\n",
    "  --driver-memory 30G\\\n",
    "  --executor-memory 30G\\\n",
    "  --conf spark.ui.enabled=false\\\n",
    "  --packages ${PACKAGES}\\\n",
    "  --exclude-packages org.scala-lang:scala-reflect\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "_ = sql(\"select 'Initialize Spark'\").collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark._jvm.com.esri.spark.Functions.registerFunctions()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "gdb = spark._jvm.com.esri.gdb.FileGDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NameIndex(MiamiExtent,9)\n",
      "NameIndex(Voyage,10)\n",
      "NameIndex(Broadcast,11)\n",
      "NameIndex(Vessel,12)\n",
      "NameIndex(BaseStations,13)\n",
      "NameIndex(AttributeUnits,14)\n",
      "NameIndex(Extent,15)\n"
     ]
    }
   ],
   "source": [
    "gdb_path = os.path.join(\"data\", \"Miami.gdb\")\n",
    "for tab in gdb.listTables(gdb_path):\n",
    "    print(tab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = spark.read \\\n",
    "    .format('com.esri.gdb') \\\n",
    "    .options(path=gdb_path, name='Broadcast') \\\n",
    "    .load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- OBJECTID: integer (nullable = false)\n",
      " |-- Shape: struct (nullable = true)\n",
      " |    |-- x: double (nullable = true)\n",
      " |    |-- y: double (nullable = true)\n",
      " |-- SOG: integer (nullable = true)\n",
      " |-- COG: integer (nullable = true)\n",
      " |-- Heading: integer (nullable = true)\n",
      " |-- ROT: integer (nullable = true)\n",
      " |-- BaseDateTime: timestamp (nullable = true)\n",
      " |-- Status: integer (nullable = true)\n",
      " |-- VoyageID: integer (nullable = true)\n",
      " |-- MMSI: integer (nullable = true)\n",
      " |-- ReceiverType: string (nullable = true)\n",
      " |-- ReceiverID: string (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1365578"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.registerTempTable(\"bc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------+--------------------+----------+------------------+--------------------+----------+------------------+\n",
      "|     mmsi|                 dt1|        x1|                y1|                 dt2|        x2|                y2|\n",
      "+---------+--------------------+----------+------------------+--------------------+----------+------------------+\n",
      "|209570000| 2009-01-03 23:30:00|-79.972952|25.910472999999996|2009-01-03 23:31:...| -79.97176|         25.906058|\n",
      "|209570000|2009-01-03 23:44:...| -79.95738|25.854034999999996| 2009-01-03 23:45:00|-79.956087|25.849666999999997|\n",
      "|209570000| 2009-01-04 00:15:00|-79.923492|25.737345000000005|2009-01-04 00:16:...|-79.922198|25.732974999999996|\n",
      "|367136710| 2009-01-02 22:09:00|-79.844383|25.592617000000004|2009-01-02 22:10:...|-79.843567|25.599082999999993|\n",
      "|367136710|2009-01-02 22:10:...|-79.843567|25.599082999999993|2009-01-02 22:11:...|-79.842817|25.605099999999993|\n",
      "|367136710|2009-01-02 22:11:...|-79.842817|25.605099999999993| 2009-01-02 22:12:00| -79.84205|          25.61125|\n",
      "|367136710| 2009-01-02 22:12:00| -79.84205|          25.61125| 2009-01-02 22:13:00| -79.84115|25.618233000000004|\n",
      "|367136710| 2009-01-02 22:13:00| -79.84115|25.618233000000004|2009-01-02 22:14:...|-79.840383|           25.6246|\n",
      "|367136710|2009-01-02 22:14:...|-79.840383|           25.6246| 2009-01-02 22:15:00|-79.839583|25.630849999999995|\n",
      "|367136710| 2009-01-02 22:15:00|-79.839583|25.630849999999995| 2009-01-02 22:16:00|-79.838783|25.637100000000004|\n",
      "|367136710| 2009-01-02 22:16:00|-79.838783|25.637100000000004|2009-01-02 22:17:...|-79.838017|25.643249999999995|\n",
      "|367136710|2009-01-02 22:17:...|-79.838017|25.643249999999995| 2009-01-02 22:18:00| -79.83725|25.649617000000006|\n",
      "|367136710| 2009-01-02 22:18:00| -79.83725|25.649617000000006| 2009-01-02 22:19:00|-79.836517|25.655766999999997|\n",
      "|367136710| 2009-01-02 22:19:00|-79.836517|25.655766999999997| 2009-01-02 22:20:00|-79.835733|25.662549999999996|\n",
      "|367136710| 2009-01-02 22:20:00|-79.835733|25.662549999999996| 2009-01-02 22:21:00|-79.834983|25.668932999999996|\n",
      "|367136710| 2009-01-02 22:21:00|-79.834983|25.668932999999996| 2009-01-02 22:22:00|-79.834267|25.675317000000007|\n",
      "|367136710| 2009-01-02 22:22:00|-79.834267|25.675317000000007|2009-01-02 22:23:...|-79.833583|         25.681517|\n",
      "|367136710|2009-01-02 22:23:...|-79.833583|         25.681517|2009-01-02 22:24:...| -79.83285|          25.68795|\n",
      "|367136710|2009-01-02 22:24:...| -79.83285|          25.68795| 2009-01-02 22:25:00|-79.832167|25.694166999999993|\n",
      "|367136710| 2009-01-02 22:25:00|-79.832167|25.694166999999993| 2009-01-02 22:26:00|-79.831467|25.700699999999998|\n",
      "|367136710| 2009-01-02 22:26:00|-79.831467|25.700699999999998|2009-01-02 22:27:...|-79.830783|25.706917000000004|\n",
      "|367136710|2009-01-02 22:27:...|-79.830783|25.706917000000004| 2009-01-02 22:28:00|-79.830133|25.713250000000002|\n",
      "|367136710| 2009-01-02 22:28:00|-79.830133|25.713250000000002| 2009-01-02 22:29:00|-79.829483|25.719466999999995|\n",
      "|367136710| 2009-01-02 22:29:00|-79.829483|25.719466999999995|2009-01-02 22:30:...|-79.828783|25.725899999999996|\n",
      "|367136710|2009-01-02 22:30:...|-79.828783|25.725899999999996| 2009-01-02 22:31:00|-79.828117|25.732117000000002|\n",
      "|367136710| 2009-01-02 22:31:00|-79.828117|25.732117000000002| 2009-01-02 22:32:00|  -79.8274|25.738550000000004|\n",
      "|367136710| 2009-01-02 22:32:00|  -79.8274|25.738550000000004|2009-01-02 22:33:...|-79.826717|25.744749999999996|\n",
      "|367136710|2009-01-02 22:33:...|-79.826717|25.744749999999996| 2009-01-02 22:34:00|-79.826033|25.751166999999995|\n",
      "|367136710| 2009-01-02 22:34:00|-79.826033|25.751166999999995| 2009-01-02 22:35:00|  -79.8254|25.757383000000004|\n",
      "|367136710| 2009-01-02 22:35:00|  -79.8254|25.757383000000004|2009-01-02 22:36:...|-79.824617|25.764432999999997|\n",
      "|367136710|2009-01-02 22:36:...|-79.824617|25.764432999999997| 2009-01-02 22:37:00|-79.823967|25.770633000000004|\n",
      "|367136710| 2009-01-02 22:37:00|-79.823967|25.770633000000004| 2009-01-02 22:38:00|-79.823233|25.777050000000003|\n",
      "+---------+--------------------+----------+------------------+--------------------+----------+------------------+\n",
      "only showing top 32 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sql(\"\"\"\n",
    "select mmsi,dt1,x1,y1,dt2,x2,y2\n",
    "from (\n",
    "select MMSI mmsi,\n",
    "BaseDateTime as dt1,\n",
    "Shape.x as x1,\n",
    "Shape.y as y1,\n",
    "lead(BaseDateTime) over (partition by MMSI order by BaseDateTime) as dt2,\n",
    "lead(Shape.x) over (partition by MMSI order by BaseDateTime) as x2,\n",
    "lead(Shape.y) over (partition by MMSI order by BaseDateTime) as y2\n",
    "from bc\n",
    ")\n",
    "where dt1 < dt2 and haversine(x1,y1,x2,y2) > 500.0\n",
    "\"\"\").show(32, truncate=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
